{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "6e711393-7a75-17e3-539c-9169c1ae1225"
   },
   "source": [
    "It seems the current [high scoring script][1] is written in R using H2O. So let us do one in python using XGBoost. \n",
    "\n",
    "Thanks to [this script][2] for feature engineering ideas. \n",
    "\n",
    "We shall start with importing the necessary modules\n",
    "\n",
    "\n",
    "  [1]: https://www.kaggle.com/gospursgo/two-sigma-connect-rental-listing-inquiries/h2o-starter-pack/run/835757\n",
    "  [2]: https://www.kaggle.com/aikinogard/two-sigma-connect-rental-listing-inquiries/random-forest-starter-with-numerical-features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "_cell_guid": "1952347b-6dc9-b9f1-fa25-94587a2aee77"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import operator\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import sparse\n",
    "import xgboost as xgb\n",
    "from sklearn import model_selection, preprocessing, ensemble\n",
    "from sklearn.metrics import log_loss\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "d7d59f0a-0026-8e33-6236-31637173734f"
   },
   "source": [
    "Now let us write a custom function to run the xgboost model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "_cell_guid": "af6e68af-f7a8-b0ac-c565-1d04818258f9"
   },
   "outputs": [],
   "source": [
    "def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, seed_val=0, num_rounds=1000):\n",
    "    param = {}\n",
    "    param['objective'] = 'multi:softprob'\n",
    "    param['eta'] = 0.1\n",
    "    param['max_depth'] = 6\n",
    "    param['silent'] = 1\n",
    "    param['num_class'] = 3\n",
    "    param['eval_metric'] = \"mlogloss\"\n",
    "    param['min_child_weight'] = 1\n",
    "    param['subsample'] = 0.7\n",
    "    param['colsample_bytree'] = 0.7\n",
    "    param['seed'] = seed_val\n",
    "    num_rounds = num_rounds\n",
    "\n",
    "    plst = list(param.items())\n",
    "    xgtrain = xgb.DMatrix(train_X, label=train_y)\n",
    "\n",
    "    if test_y is not None:\n",
    "        xgtest = xgb.DMatrix(test_X, label=test_y)\n",
    "        watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]\n",
    "        model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=20)\n",
    "    else:\n",
    "        xgtest = xgb.DMatrix(test_X)\n",
    "        model = xgb.train(plst, xgtrain, num_rounds)\n",
    "\n",
    "    pred_test_y = model.predict(xgtest)\n",
    "    return pred_test_y, model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "c4a69cea-cb06-5d6a-83b7-16ee8ee241f6"
   },
   "source": [
    "Let us read the train and test files and store it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "_cell_guid": "0108ce34-5e84-7f49-bd6f-6562d60a9082"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(49352, 15)\n",
      "(74659, 14)\n"
     ]
    }
   ],
   "source": [
    "data_path = \"../input/\"\n",
    "train_file = data_path + \"train.json\"\n",
    "test_file = data_path + \"test.json\"\n",
    "train_df = pd.read_json(train_file)\n",
    "test_df = pd.read_json(test_file)\n",
    "print(train_df.shape)\n",
    "print(test_df.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "2bf65ce4-7375-c8e9-97d5-621736f3338d"
   },
   "source": [
    "We do not need any pre-processing for numerical features and so create a list with those features."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "_cell_guid": "6462885f-97de-b2d1-2c1a-1958115c4c4d"
   },
   "outputs": [],
   "source": [
    "features_to_use  = [\"bathrooms\", \"bedrooms\", \"latitude\", \"longitude\", \"price\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "b7670810-6d0b-89d0-629e-f99624421229"
   },
   "source": [
    "Now let us create some new features from the given features."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "_cell_guid": "e3b81db5-929d-b8b8-141c-1bbb4a5eaaf3"
   },
   "outputs": [],
   "source": [
    "# count of photos #\n",
    "train_df[\"num_photos\"] = train_df[\"photos\"].apply(len)\n",
    "test_df[\"num_photos\"] = test_df[\"photos\"].apply(len)\n",
    "\n",
    "# count of \"features\" #\n",
    "train_df[\"num_features\"] = train_df[\"features\"].apply(len)\n",
    "test_df[\"num_features\"] = test_df[\"features\"].apply(len)\n",
    "\n",
    "# count of words present in description column #\n",
    "train_df[\"num_description_words\"] = train_df[\"description\"].apply(lambda x: len(x.split(\" \")))\n",
    "test_df[\"num_description_words\"] = test_df[\"description\"].apply(lambda x: len(x.split(\" \")))\n",
    "\n",
    "# convert the created column to datetime object so as to extract more features \n",
    "train_df[\"created\"] = pd.to_datetime(train_df[\"created\"])\n",
    "test_df[\"created\"] = pd.to_datetime(test_df[\"created\"])\n",
    "\n",
    "# Let us extract some features like year, month, day, hour from date columns #\n",
    "train_df[\"created_year\"] = train_df[\"created\"].dt.year\n",
    "test_df[\"created_year\"] = test_df[\"created\"].dt.year\n",
    "train_df[\"created_month\"] = train_df[\"created\"].dt.month\n",
    "test_df[\"created_month\"] = test_df[\"created\"].dt.month\n",
    "train_df[\"created_day\"] = train_df[\"created\"].dt.day\n",
    "test_df[\"created_day\"] = test_df[\"created\"].dt.day\n",
    "train_df[\"created_hour\"] = train_df[\"created\"].dt.hour\n",
    "test_df[\"created_hour\"] = test_df[\"created\"].dt.hour\n",
    "\n",
    "# adding all these new features to use list #\n",
    "features_to_use.extend([\"num_photos\", \"num_features\", \"num_description_words\",\"created_year\", \"created_month\", \"created_day\", \"listing_id\", \"created_hour\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "3d9aa966-66a2-8ff8-2459-40e0187418a2"
   },
   "source": [
    "We have 4 categorical features in our data\n",
    "\n",
    " - display_address\n",
    " - manager_id\n",
    " - building_id\n",
    " - listing_id\n",
    "\n",
    "So let us label encode these features."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "_cell_guid": "af410ae2-6197-adce-ee68-360aa59eff7e"
   },
   "outputs": [],
   "source": [
    "categorical = [\"display_address\", \"manager_id\", \"building_id\", \"street_address\"]\n",
    "for f in categorical:\n",
    "        if train_df[f].dtype=='object':\n",
    "            #print(f)\n",
    "            lbl = preprocessing.LabelEncoder()\n",
    "            lbl.fit(list(train_df[f].values) + list(test_df[f].values))\n",
    "            train_df[f] = lbl.transform(list(train_df[f].values))\n",
    "            test_df[f] = lbl.transform(list(test_df[f].values))\n",
    "            features_to_use.append(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "3f550f0f-0c6f-2432-2c07-d507632eaa2b"
   },
   "source": [
    "We have features column which is a list of string values. So we can first combine all the strings together to get a single string and then apply count vectorizer on top of it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "_cell_guid": "d1ea3504-a12c-023a-bce6-d4f93ddb8019"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10                                                         \n",
      "10000     Doorman Elevator Fitness_Center Cats_Allowed D...\n",
      "100004    Laundry_In_Building Dishwasher Hardwood_Floors...\n",
      "100007                               Hardwood_Floors No_Fee\n",
      "100013                                              Pre-War\n",
      "Name: features, dtype: object\n"
     ]
    }
   ],
   "source": [
    "train_df['features'] = train_df[\"features\"].apply(lambda x: \" \".join([\"_\".join(i.split(\" \")) for i in x]))\n",
    "test_df['features'] = test_df[\"features\"].apply(lambda x: \" \".join([\"_\".join(i.split(\" \")) for i in x]))\n",
    "print(train_df[\"features\"].head())\n",
    "tfidf = CountVectorizer(stop_words='english', max_features=200)\n",
    "tr_sparse = tfidf.fit_transform(train_df[\"features\"])\n",
    "te_sparse = tfidf.transform(test_df[\"features\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "2bfbcacc-e821-654b-f2b3-cda0f1a5a20b"
   },
   "source": [
    "Now let us stack both the dense and sparse features into a single dataset and also get the target variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "_cell_guid": "9eeef912-2104-e97e-1948-c246652340e1"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(49352, 217) (74659, 217)\n"
     ]
    }
   ],
   "source": [
    "train_X = sparse.hstack([train_df[features_to_use], tr_sparse]).tocsr()\n",
    "test_X = sparse.hstack([test_df[features_to_use], te_sparse]).tocsr()\n",
    "\n",
    "target_num_map = {'high':0, 'medium':1, 'low':2}\n",
    "train_y = np.array(train_df['interest_level'].apply(lambda x: target_num_map[x]))\n",
    "print(train_X.shape, test_X.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "7d2e5fb7-7886-68b1-326f-6db491215001"
   },
   "source": [
    "Now let us do some cross validation to check the scores. \n",
    "\n",
    "Please run it in local to get the cv scores. I am commenting it out here for time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "_cell_guid": "13fd60b9-a8b5-c76f-1fbd-2a56219da0d2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\ttrain-mlogloss:1.04114\ttest-mlogloss:1.04219\n",
      "Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.\n",
      "\n",
      "Will train until test-mlogloss hasn't improved in 20 rounds.\n",
      "[1]\ttrain-mlogloss:0.988799\ttest-mlogloss:0.990721\n",
      "[2]\ttrain-mlogloss:0.944048\ttest-mlogloss:0.94691\n",
      "[3]\ttrain-mlogloss:0.90518\ttest-mlogloss:0.908812\n",
      "[4]\ttrain-mlogloss:0.8718\ttest-mlogloss:0.876215\n",
      "[5]\ttrain-mlogloss:0.841498\ttest-mlogloss:0.847057\n",
      "[6]\ttrain-mlogloss:0.815614\ttest-mlogloss:0.821795\n",
      "[7]\ttrain-mlogloss:0.79312\ttest-mlogloss:0.799993\n",
      "[8]\ttrain-mlogloss:0.773194\ttest-mlogloss:0.780815\n",
      "[9]\ttrain-mlogloss:0.754598\ttest-mlogloss:0.763247\n",
      "[10]\ttrain-mlogloss:0.738162\ttest-mlogloss:0.747594\n",
      "[11]\ttrain-mlogloss:0.724634\ttest-mlogloss:0.734739\n",
      "[12]\ttrain-mlogloss:0.711331\ttest-mlogloss:0.722318\n",
      "[13]\ttrain-mlogloss:0.699821\ttest-mlogloss:0.711481\n",
      "[14]\ttrain-mlogloss:0.689142\ttest-mlogloss:0.701381\n",
      "[15]\ttrain-mlogloss:0.678446\ttest-mlogloss:0.691482\n",
      "[16]\ttrain-mlogloss:0.669268\ttest-mlogloss:0.683158\n",
      "[17]\ttrain-mlogloss:0.66185\ttest-mlogloss:0.67647\n",
      "[18]\ttrain-mlogloss:0.654386\ttest-mlogloss:0.669772\n",
      "[19]\ttrain-mlogloss:0.648071\ttest-mlogloss:0.664241\n",
      "[20]\ttrain-mlogloss:0.642589\ttest-mlogloss:0.659292\n",
      "[21]\ttrain-mlogloss:0.637133\ttest-mlogloss:0.654492\n",
      "[22]\ttrain-mlogloss:0.632064\ttest-mlogloss:0.650024\n",
      "[23]\ttrain-mlogloss:0.627592\ttest-mlogloss:0.646221\n",
      "[24]\ttrain-mlogloss:0.622447\ttest-mlogloss:0.641828\n",
      "[25]\ttrain-mlogloss:0.618027\ttest-mlogloss:0.638092\n",
      "[26]\ttrain-mlogloss:0.614181\ttest-mlogloss:0.635053\n",
      "[27]\ttrain-mlogloss:0.61114\ttest-mlogloss:0.632717\n",
      "[28]\ttrain-mlogloss:0.607278\ttest-mlogloss:0.629888\n",
      "[29]\ttrain-mlogloss:0.603595\ttest-mlogloss:0.627116\n",
      "[30]\ttrain-mlogloss:0.600566\ttest-mlogloss:0.624912\n",
      "[31]\ttrain-mlogloss:0.597396\ttest-mlogloss:0.622441\n",
      "[32]\ttrain-mlogloss:0.594581\ttest-mlogloss:0.620373\n",
      "[33]\ttrain-mlogloss:0.591807\ttest-mlogloss:0.618497\n",
      "[34]\ttrain-mlogloss:0.589131\ttest-mlogloss:0.616384\n",
      "[35]\ttrain-mlogloss:0.586585\ttest-mlogloss:0.614496\n",
      "[36]\ttrain-mlogloss:0.583978\ttest-mlogloss:0.612716\n",
      "[37]\ttrain-mlogloss:0.582015\ttest-mlogloss:0.611317\n",
      "[38]\ttrain-mlogloss:0.579514\ttest-mlogloss:0.609588\n",
      "[39]\ttrain-mlogloss:0.576912\ttest-mlogloss:0.607814\n",
      "[40]\ttrain-mlogloss:0.574746\ttest-mlogloss:0.606454\n",
      "[41]\ttrain-mlogloss:0.572975\ttest-mlogloss:0.605284\n",
      "[42]\ttrain-mlogloss:0.570366\ttest-mlogloss:0.603354\n",
      "[43]\ttrain-mlogloss:0.568138\ttest-mlogloss:0.602107\n",
      "[44]\ttrain-mlogloss:0.565862\ttest-mlogloss:0.600475\n",
      "[45]\ttrain-mlogloss:0.564646\ttest-mlogloss:0.599563\n",
      "[46]\ttrain-mlogloss:0.562649\ttest-mlogloss:0.598221\n",
      "[47]\ttrain-mlogloss:0.560823\ttest-mlogloss:0.597094\n",
      "[48]\ttrain-mlogloss:0.559184\ttest-mlogloss:0.596101\n",
      "[49]\ttrain-mlogloss:0.557642\ttest-mlogloss:0.595268\n",
      "[50]\ttrain-mlogloss:0.555695\ttest-mlogloss:0.594217\n",
      "[51]\ttrain-mlogloss:0.553391\ttest-mlogloss:0.593256\n",
      "[52]\ttrain-mlogloss:0.551141\ttest-mlogloss:0.592129\n",
      "[53]\ttrain-mlogloss:0.549666\ttest-mlogloss:0.591489\n",
      "[54]\ttrain-mlogloss:0.547321\ttest-mlogloss:0.590389\n",
      "[55]\ttrain-mlogloss:0.546197\ttest-mlogloss:0.589846\n",
      "[56]\ttrain-mlogloss:0.544658\ttest-mlogloss:0.589096\n",
      "[57]\ttrain-mlogloss:0.543389\ttest-mlogloss:0.588546\n",
      "[58]\ttrain-mlogloss:0.541408\ttest-mlogloss:0.58737\n",
      "[59]\ttrain-mlogloss:0.540229\ttest-mlogloss:0.586951\n",
      "[60]\ttrain-mlogloss:0.538715\ttest-mlogloss:0.58633\n",
      "[61]\ttrain-mlogloss:0.537227\ttest-mlogloss:0.585638\n",
      "[62]\ttrain-mlogloss:0.535932\ttest-mlogloss:0.585132\n",
      "[63]\ttrain-mlogloss:0.534624\ttest-mlogloss:0.584407\n",
      "[64]\ttrain-mlogloss:0.533186\ttest-mlogloss:0.58367\n",
      "[65]\ttrain-mlogloss:0.531767\ttest-mlogloss:0.582788\n",
      "[66]\ttrain-mlogloss:0.530367\ttest-mlogloss:0.582063\n",
      "[67]\ttrain-mlogloss:0.529023\ttest-mlogloss:0.581331\n",
      "[68]\ttrain-mlogloss:0.527781\ttest-mlogloss:0.58068\n",
      "[69]\ttrain-mlogloss:0.526511\ttest-mlogloss:0.580342\n",
      "[70]\ttrain-mlogloss:0.525392\ttest-mlogloss:0.579888\n",
      "[71]\ttrain-mlogloss:0.52422\ttest-mlogloss:0.579319\n",
      "[72]\ttrain-mlogloss:0.523065\ttest-mlogloss:0.578852\n",
      "[73]\ttrain-mlogloss:0.522163\ttest-mlogloss:0.578434\n",
      "[74]\ttrain-mlogloss:0.520843\ttest-mlogloss:0.577687\n",
      "[75]\ttrain-mlogloss:0.520055\ttest-mlogloss:0.577254\n",
      "[76]\ttrain-mlogloss:0.519149\ttest-mlogloss:0.576857\n",
      "[77]\ttrain-mlogloss:0.517909\ttest-mlogloss:0.57638\n",
      "[78]\ttrain-mlogloss:0.516506\ttest-mlogloss:0.575721\n",
      "[79]\ttrain-mlogloss:0.515361\ttest-mlogloss:0.575472\n",
      "[80]\ttrain-mlogloss:0.514641\ttest-mlogloss:0.575183\n",
      "[81]\ttrain-mlogloss:0.513579\ttest-mlogloss:0.574743\n",
      "[82]\ttrain-mlogloss:0.512622\ttest-mlogloss:0.574371\n",
      "[83]\ttrain-mlogloss:0.511446\ttest-mlogloss:0.574089\n",
      "[84]\ttrain-mlogloss:0.510372\ttest-mlogloss:0.573719\n",
      "[85]\ttrain-mlogloss:0.509183\ttest-mlogloss:0.573575\n",
      "[86]\ttrain-mlogloss:0.508148\ttest-mlogloss:0.573277\n",
      "[87]\ttrain-mlogloss:0.50706\ttest-mlogloss:0.572957\n",
      "[88]\ttrain-mlogloss:0.50622\ttest-mlogloss:0.572635\n",
      "[89]\ttrain-mlogloss:0.505219\ttest-mlogloss:0.572276\n",
      "[90]\ttrain-mlogloss:0.504375\ttest-mlogloss:0.571933\n",
      "[91]\ttrain-mlogloss:0.503762\ttest-mlogloss:0.571746\n",
      "[92]\ttrain-mlogloss:0.502992\ttest-mlogloss:0.571413\n",
      "[93]\ttrain-mlogloss:0.502076\ttest-mlogloss:0.571129\n",
      "[94]\ttrain-mlogloss:0.500902\ttest-mlogloss:0.570822\n",
      "[95]\ttrain-mlogloss:0.500169\ttest-mlogloss:0.570567\n",
      "[96]\ttrain-mlogloss:0.499278\ttest-mlogloss:0.570131\n",
      "[97]\ttrain-mlogloss:0.498181\ttest-mlogloss:0.569639\n",
      "[98]\ttrain-mlogloss:0.497191\ttest-mlogloss:0.569336\n",
      "[99]\ttrain-mlogloss:0.496139\ttest-mlogloss:0.569146\n",
      "[100]\ttrain-mlogloss:0.495544\ttest-mlogloss:0.56896\n",
      "[101]\ttrain-mlogloss:0.494762\ttest-mlogloss:0.568668\n",
      "[102]\ttrain-mlogloss:0.493763\ttest-mlogloss:0.568456\n",
      "[103]\ttrain-mlogloss:0.492945\ttest-mlogloss:0.568271\n",
      "[104]\ttrain-mlogloss:0.491708\ttest-mlogloss:0.567905\n",
      "[105]\ttrain-mlogloss:0.490897\ttest-mlogloss:0.567701\n",
      "[106]\ttrain-mlogloss:0.490114\ttest-mlogloss:0.567514\n",
      "[107]\ttrain-mlogloss:0.48894\ttest-mlogloss:0.567149\n",
      "[108]\ttrain-mlogloss:0.488131\ttest-mlogloss:0.566846\n",
      "[109]\ttrain-mlogloss:0.487414\ttest-mlogloss:0.566577\n",
      "[110]\ttrain-mlogloss:0.486545\ttest-mlogloss:0.566364\n",
      "[111]\ttrain-mlogloss:0.485623\ttest-mlogloss:0.566043\n",
      "[112]\ttrain-mlogloss:0.484816\ttest-mlogloss:0.565925\n",
      "[113]\ttrain-mlogloss:0.484138\ttest-mlogloss:0.565711\n",
      "[114]\ttrain-mlogloss:0.483216\ttest-mlogloss:0.56544\n",
      "[115]\ttrain-mlogloss:0.482588\ttest-mlogloss:0.565323\n",
      "[116]\ttrain-mlogloss:0.481523\ttest-mlogloss:0.565\n",
      "[117]\ttrain-mlogloss:0.48092\ttest-mlogloss:0.564753\n",
      "[118]\ttrain-mlogloss:0.480238\ttest-mlogloss:0.564586\n",
      "[119]\ttrain-mlogloss:0.47942\ttest-mlogloss:0.564378\n",
      "[120]\ttrain-mlogloss:0.478738\ttest-mlogloss:0.564245\n",
      "[121]\ttrain-mlogloss:0.478011\ttest-mlogloss:0.56409\n",
      "[122]\ttrain-mlogloss:0.476949\ttest-mlogloss:0.56384\n",
      "[123]\ttrain-mlogloss:0.476118\ttest-mlogloss:0.563467\n",
      "[124]\ttrain-mlogloss:0.475843\ttest-mlogloss:0.563276\n",
      "[125]\ttrain-mlogloss:0.474954\ttest-mlogloss:0.562983\n",
      "[126]\ttrain-mlogloss:0.474088\ttest-mlogloss:0.562882\n",
      "[127]\ttrain-mlogloss:0.473533\ttest-mlogloss:0.562699\n",
      "[128]\ttrain-mlogloss:0.472967\ttest-mlogloss:0.562539\n",
      "[129]\ttrain-mlogloss:0.472171\ttest-mlogloss:0.562386\n",
      "[130]\ttrain-mlogloss:0.471264\ttest-mlogloss:0.562188\n",
      "[131]\ttrain-mlogloss:0.470706\ttest-mlogloss:0.562049\n",
      "[132]\ttrain-mlogloss:0.469903\ttest-mlogloss:0.561895\n",
      "[133]\ttrain-mlogloss:0.469176\ttest-mlogloss:0.561649\n",
      "[134]\ttrain-mlogloss:0.468483\ttest-mlogloss:0.561359\n",
      "[135]\ttrain-mlogloss:0.467675\ttest-mlogloss:0.561175\n",
      "[136]\ttrain-mlogloss:0.466944\ttest-mlogloss:0.560943\n",
      "[137]\ttrain-mlogloss:0.466573\ttest-mlogloss:0.560931\n",
      "[138]\ttrain-mlogloss:0.465994\ttest-mlogloss:0.560789\n",
      "[139]\ttrain-mlogloss:0.465236\ttest-mlogloss:0.560444\n",
      "[140]\ttrain-mlogloss:0.464364\ttest-mlogloss:0.560345\n",
      "[141]\ttrain-mlogloss:0.463396\ttest-mlogloss:0.560242\n",
      "[142]\ttrain-mlogloss:0.46274\ttest-mlogloss:0.560137\n",
      "[143]\ttrain-mlogloss:0.462101\ttest-mlogloss:0.55996\n",
      "[144]\ttrain-mlogloss:0.461377\ttest-mlogloss:0.559821\n",
      "[145]\ttrain-mlogloss:0.460638\ttest-mlogloss:0.559611\n",
      "[146]\ttrain-mlogloss:0.459958\ttest-mlogloss:0.559478\n",
      "[147]\ttrain-mlogloss:0.459362\ttest-mlogloss:0.559354\n",
      "[148]\ttrain-mlogloss:0.458515\ttest-mlogloss:0.559138\n",
      "[149]\ttrain-mlogloss:0.457808\ttest-mlogloss:0.559009\n",
      "[150]\ttrain-mlogloss:0.45738\ttest-mlogloss:0.558911\n",
      "[151]\ttrain-mlogloss:0.456855\ttest-mlogloss:0.55884\n",
      "[152]\ttrain-mlogloss:0.456063\ttest-mlogloss:0.558697\n",
      "[153]\ttrain-mlogloss:0.455421\ttest-mlogloss:0.558521\n",
      "[154]\ttrain-mlogloss:0.454662\ttest-mlogloss:0.558377\n",
      "[155]\ttrain-mlogloss:0.454117\ttest-mlogloss:0.558296\n",
      "[156]\ttrain-mlogloss:0.453326\ttest-mlogloss:0.558084\n",
      "[157]\ttrain-mlogloss:0.452753\ttest-mlogloss:0.557905\n",
      "[158]\ttrain-mlogloss:0.452359\ttest-mlogloss:0.557868\n",
      "[159]\ttrain-mlogloss:0.451707\ttest-mlogloss:0.557636\n",
      "[160]\ttrain-mlogloss:0.451068\ttest-mlogloss:0.557454\n",
      "[161]\ttrain-mlogloss:0.450408\ttest-mlogloss:0.557361\n",
      "[162]\ttrain-mlogloss:0.449685\ttest-mlogloss:0.557289\n",
      "[163]\ttrain-mlogloss:0.448961\ttest-mlogloss:0.557146\n",
      "[164]\ttrain-mlogloss:0.448501\ttest-mlogloss:0.557029\n",
      "[165]\ttrain-mlogloss:0.447691\ttest-mlogloss:0.556853\n",
      "[166]\ttrain-mlogloss:0.446992\ttest-mlogloss:0.556806\n",
      "[167]\ttrain-mlogloss:0.446296\ttest-mlogloss:0.556598\n",
      "[168]\ttrain-mlogloss:0.445686\ttest-mlogloss:0.556577\n",
      "[169]\ttrain-mlogloss:0.444956\ttest-mlogloss:0.556382\n",
      "[170]\ttrain-mlogloss:0.444435\ttest-mlogloss:0.556329\n",
      "[171]\ttrain-mlogloss:0.443592\ttest-mlogloss:0.556008\n",
      "[172]\ttrain-mlogloss:0.442805\ttest-mlogloss:0.555822\n",
      "[173]\ttrain-mlogloss:0.442412\ttest-mlogloss:0.555704\n",
      "[174]\ttrain-mlogloss:0.441773\ttest-mlogloss:0.555605\n",
      "[175]\ttrain-mlogloss:0.441135\ttest-mlogloss:0.555466\n",
      "[176]\ttrain-mlogloss:0.440742\ttest-mlogloss:0.555388\n",
      "[177]\ttrain-mlogloss:0.44027\ttest-mlogloss:0.555334\n",
      "[178]\ttrain-mlogloss:0.439462\ttest-mlogloss:0.555133\n",
      "[179]\ttrain-mlogloss:0.43881\ttest-mlogloss:0.554992\n",
      "[180]\ttrain-mlogloss:0.438174\ttest-mlogloss:0.554753\n",
      "[181]\ttrain-mlogloss:0.437383\ttest-mlogloss:0.554644\n",
      "[182]\ttrain-mlogloss:0.436838\ttest-mlogloss:0.554575\n",
      "[183]\ttrain-mlogloss:0.436125\ttest-mlogloss:0.554404\n",
      "[184]\ttrain-mlogloss:0.435588\ttest-mlogloss:0.554327\n",
      "[185]\ttrain-mlogloss:0.435114\ttest-mlogloss:0.55427\n",
      "[186]\ttrain-mlogloss:0.434355\ttest-mlogloss:0.554231\n",
      "[187]\ttrain-mlogloss:0.43382\ttest-mlogloss:0.554011\n",
      "[188]\ttrain-mlogloss:0.433208\ttest-mlogloss:0.553862\n",
      "[189]\ttrain-mlogloss:0.43253\ttest-mlogloss:0.553751\n",
      "[190]\ttrain-mlogloss:0.432027\ttest-mlogloss:0.553633\n",
      "[191]\ttrain-mlogloss:0.43148\ttest-mlogloss:0.553609\n",
      "[192]\ttrain-mlogloss:0.431025\ttest-mlogloss:0.553599\n",
      "[193]\ttrain-mlogloss:0.430441\ttest-mlogloss:0.553502\n",
      "[194]\ttrain-mlogloss:0.429787\ttest-mlogloss:0.553418\n",
      "[195]\ttrain-mlogloss:0.429262\ttest-mlogloss:0.553465\n",
      "[196]\ttrain-mlogloss:0.42865\ttest-mlogloss:0.553342\n",
      "[197]\ttrain-mlogloss:0.428045\ttest-mlogloss:0.553264\n",
      "[198]\ttrain-mlogloss:0.427341\ttest-mlogloss:0.553197\n",
      "[199]\ttrain-mlogloss:0.426563\ttest-mlogloss:0.552965\n",
      "[200]\ttrain-mlogloss:0.426066\ttest-mlogloss:0.552906\n",
      "[201]\ttrain-mlogloss:0.42541\ttest-mlogloss:0.552713\n",
      "[202]\ttrain-mlogloss:0.424861\ttest-mlogloss:0.552693\n",
      "[203]\ttrain-mlogloss:0.42421\ttest-mlogloss:0.552601\n",
      "[204]\ttrain-mlogloss:0.423567\ttest-mlogloss:0.552647\n",
      "[205]\ttrain-mlogloss:0.422962\ttest-mlogloss:0.552553\n",
      "[206]\ttrain-mlogloss:0.422326\ttest-mlogloss:0.552551\n",
      "[207]\ttrain-mlogloss:0.421518\ttest-mlogloss:0.55258\n",
      "[208]\ttrain-mlogloss:0.420897\ttest-mlogloss:0.552612\n",
      "[209]\ttrain-mlogloss:0.420392\ttest-mlogloss:0.552503\n",
      "[210]\ttrain-mlogloss:0.420065\ttest-mlogloss:0.552369\n",
      "[211]\ttrain-mlogloss:0.419603\ttest-mlogloss:0.55221\n",
      "[212]\ttrain-mlogloss:0.41903\ttest-mlogloss:0.552108\n",
      "[213]\ttrain-mlogloss:0.418522\ttest-mlogloss:0.551998\n",
      "[214]\ttrain-mlogloss:0.417667\ttest-mlogloss:0.551873\n",
      "[215]\ttrain-mlogloss:0.417187\ttest-mlogloss:0.551808\n",
      "[216]\ttrain-mlogloss:0.416637\ttest-mlogloss:0.551775\n",
      "[217]\ttrain-mlogloss:0.41618\ttest-mlogloss:0.55173\n",
      "[218]\ttrain-mlogloss:0.415826\ttest-mlogloss:0.55165\n",
      "[219]\ttrain-mlogloss:0.415501\ttest-mlogloss:0.551587\n",
      "[220]\ttrain-mlogloss:0.415265\ttest-mlogloss:0.551546\n",
      "[221]\ttrain-mlogloss:0.414692\ttest-mlogloss:0.551359\n",
      "[222]\ttrain-mlogloss:0.414234\ttest-mlogloss:0.551307\n",
      "[223]\ttrain-mlogloss:0.413624\ttest-mlogloss:0.551199\n",
      "[224]\ttrain-mlogloss:0.41308\ttest-mlogloss:0.551012\n",
      "[225]\ttrain-mlogloss:0.41247\ttest-mlogloss:0.550941\n",
      "[226]\ttrain-mlogloss:0.411947\ttest-mlogloss:0.550983\n",
      "[227]\ttrain-mlogloss:0.411371\ttest-mlogloss:0.550967\n",
      "[228]\ttrain-mlogloss:0.41081\ttest-mlogloss:0.550876\n",
      "[229]\ttrain-mlogloss:0.410216\ttest-mlogloss:0.550737\n",
      "[230]\ttrain-mlogloss:0.409747\ttest-mlogloss:0.550653\n",
      "[231]\ttrain-mlogloss:0.409131\ttest-mlogloss:0.550562\n",
      "[232]\ttrain-mlogloss:0.408654\ttest-mlogloss:0.55062\n",
      "[233]\ttrain-mlogloss:0.408119\ttest-mlogloss:0.550529\n",
      "[234]\ttrain-mlogloss:0.407361\ttest-mlogloss:0.550505\n",
      "[235]\ttrain-mlogloss:0.406824\ttest-mlogloss:0.550482\n",
      "[236]\ttrain-mlogloss:0.406348\ttest-mlogloss:0.55042\n",
      "[237]\ttrain-mlogloss:0.406023\ttest-mlogloss:0.550356\n",
      "[238]\ttrain-mlogloss:0.405309\ttest-mlogloss:0.550179\n",
      "[239]\ttrain-mlogloss:0.404664\ttest-mlogloss:0.55013\n",
      "[240]\ttrain-mlogloss:0.404285\ttest-mlogloss:0.550085\n",
      "[241]\ttrain-mlogloss:0.403685\ttest-mlogloss:0.55006\n",
      "[242]\ttrain-mlogloss:0.403308\ttest-mlogloss:0.549991\n",
      "[243]\ttrain-mlogloss:0.402697\ttest-mlogloss:0.549962\n",
      "[244]\ttrain-mlogloss:0.402272\ttest-mlogloss:0.549869\n",
      "[245]\ttrain-mlogloss:0.401685\ttest-mlogloss:0.549878\n",
      "[246]\ttrain-mlogloss:0.401243\ttest-mlogloss:0.549921\n",
      "[247]\ttrain-mlogloss:0.400637\ttest-mlogloss:0.549932\n",
      "[248]\ttrain-mlogloss:0.400319\ttest-mlogloss:0.549812\n",
      "[249]\ttrain-mlogloss:0.399861\ttest-mlogloss:0.549876\n",
      "[250]\ttrain-mlogloss:0.399276\ttest-mlogloss:0.549815\n",
      "[251]\ttrain-mlogloss:0.398666\ttest-mlogloss:0.549829\n",
      "[252]\ttrain-mlogloss:0.398211\ttest-mlogloss:0.549989\n",
      "[253]\ttrain-mlogloss:0.397705\ttest-mlogloss:0.549932\n",
      "[254]\ttrain-mlogloss:0.397121\ttest-mlogloss:0.550049\n",
      "[255]\ttrain-mlogloss:0.396528\ttest-mlogloss:0.550022\n",
      "[256]\ttrain-mlogloss:0.396249\ttest-mlogloss:0.550033\n",
      "[257]\ttrain-mlogloss:0.395951\ttest-mlogloss:0.549966\n",
      "[258]\ttrain-mlogloss:0.395331\ttest-mlogloss:0.549948\n",
      "[259]\ttrain-mlogloss:0.394668\ttest-mlogloss:0.549957\n",
      "[260]\ttrain-mlogloss:0.394171\ttest-mlogloss:0.549973\n",
      "[261]\ttrain-mlogloss:0.39384\ttest-mlogloss:0.549985\n",
      "[262]\ttrain-mlogloss:0.393273\ttest-mlogloss:0.550006\n",
      "[263]\ttrain-mlogloss:0.392843\ttest-mlogloss:0.5499\n",
      "[264]\ttrain-mlogloss:0.392273\ttest-mlogloss:0.549908\n",
      "[265]\ttrain-mlogloss:0.391828\ttest-mlogloss:0.549826\n",
      "[266]\ttrain-mlogloss:0.391468\ttest-mlogloss:0.549805\n",
      "[267]\ttrain-mlogloss:0.390976\ttest-mlogloss:0.549758\n",
      "[268]\ttrain-mlogloss:0.390481\ttest-mlogloss:0.549727\n",
      "[269]\ttrain-mlogloss:0.390038\ttest-mlogloss:0.549707\n",
      "[270]\ttrain-mlogloss:0.389536\ttest-mlogloss:0.549714\n",
      "[271]\ttrain-mlogloss:0.388936\ttest-mlogloss:0.549652\n",
      "[272]\ttrain-mlogloss:0.388576\ttest-mlogloss:0.549666\n",
      "[273]\ttrain-mlogloss:0.388062\ttest-mlogloss:0.549731\n",
      "[274]\ttrain-mlogloss:0.387869\ttest-mlogloss:0.549754\n",
      "[275]\ttrain-mlogloss:0.387572\ttest-mlogloss:0.549816\n",
      "[276]\ttrain-mlogloss:0.387073\ttest-mlogloss:0.549819\n",
      "[277]\ttrain-mlogloss:0.386474\ttest-mlogloss:0.54963\n",
      "[278]\ttrain-mlogloss:0.385841\ttest-mlogloss:0.549673\n",
      "[279]\ttrain-mlogloss:0.385482\ttest-mlogloss:0.549606\n",
      "[280]\ttrain-mlogloss:0.385114\ttest-mlogloss:0.549587\n",
      "[281]\ttrain-mlogloss:0.384674\ttest-mlogloss:0.54955\n",
      "[282]\ttrain-mlogloss:0.384137\ttest-mlogloss:0.549542\n",
      "[283]\ttrain-mlogloss:0.38372\ttest-mlogloss:0.549528\n",
      "[284]\ttrain-mlogloss:0.383234\ttest-mlogloss:0.549464\n",
      "[285]\ttrain-mlogloss:0.38272\ttest-mlogloss:0.549434\n",
      "[286]\ttrain-mlogloss:0.382295\ttest-mlogloss:0.549465\n",
      "[287]\ttrain-mlogloss:0.381834\ttest-mlogloss:0.549379\n",
      "[288]\ttrain-mlogloss:0.38132\ttest-mlogloss:0.54934\n",
      "[289]\ttrain-mlogloss:0.380894\ttest-mlogloss:0.549264\n",
      "[290]\ttrain-mlogloss:0.380498\ttest-mlogloss:0.549247\n",
      "[291]\ttrain-mlogloss:0.380062\ttest-mlogloss:0.549205\n",
      "[292]\ttrain-mlogloss:0.37965\ttest-mlogloss:0.549201\n",
      "[293]\ttrain-mlogloss:0.379019\ttest-mlogloss:0.549211\n",
      "[294]\ttrain-mlogloss:0.378508\ttest-mlogloss:0.549221\n",
      "[295]\ttrain-mlogloss:0.378046\ttest-mlogloss:0.549091\n",
      "[296]\ttrain-mlogloss:0.377815\ttest-mlogloss:0.549071\n",
      "[297]\ttrain-mlogloss:0.377491\ttest-mlogloss:0.549019\n",
      "[298]\ttrain-mlogloss:0.377001\ttest-mlogloss:0.549037\n",
      "[299]\ttrain-mlogloss:0.376494\ttest-mlogloss:0.549011\n",
      "[300]\ttrain-mlogloss:0.376066\ttest-mlogloss:0.548946\n",
      "[301]\ttrain-mlogloss:0.375527\ttest-mlogloss:0.548929\n",
      "[302]\ttrain-mlogloss:0.375013\ttest-mlogloss:0.54892\n",
      "[303]\ttrain-mlogloss:0.374521\ttest-mlogloss:0.549\n",
      "[304]\ttrain-mlogloss:0.373935\ttest-mlogloss:0.549171\n",
      "[305]\ttrain-mlogloss:0.373428\ttest-mlogloss:0.549223\n",
      "[306]\ttrain-mlogloss:0.373039\ttest-mlogloss:0.54916\n",
      "[307]\ttrain-mlogloss:0.372686\ttest-mlogloss:0.549035\n",
      "[308]\ttrain-mlogloss:0.37216\ttest-mlogloss:0.548995\n",
      "[309]\ttrain-mlogloss:0.371648\ttest-mlogloss:0.548941\n",
      "[310]\ttrain-mlogloss:0.371155\ttest-mlogloss:0.548814\n",
      "[311]\ttrain-mlogloss:0.370729\ttest-mlogloss:0.548765\n",
      "[312]\ttrain-mlogloss:0.37032\ttest-mlogloss:0.548888\n",
      "[313]\ttrain-mlogloss:0.369891\ttest-mlogloss:0.548985\n",
      "[314]\ttrain-mlogloss:0.369316\ttest-mlogloss:0.548926\n",
      "[315]\ttrain-mlogloss:0.368816\ttest-mlogloss:0.548971\n",
      "[316]\ttrain-mlogloss:0.368333\ttest-mlogloss:0.548876\n",
      "[317]\ttrain-mlogloss:0.368004\ttest-mlogloss:0.548885\n",
      "[318]\ttrain-mlogloss:0.367705\ttest-mlogloss:0.548927\n",
      "[319]\ttrain-mlogloss:0.367121\ttest-mlogloss:0.548788\n",
      "[320]\ttrain-mlogloss:0.366641\ttest-mlogloss:0.548706\n",
      "[321]\ttrain-mlogloss:0.366203\ttest-mlogloss:0.548571\n",
      "[322]\ttrain-mlogloss:0.365932\ttest-mlogloss:0.548489\n",
      "[323]\ttrain-mlogloss:0.365446\ttest-mlogloss:0.548531\n",
      "[324]\ttrain-mlogloss:0.365172\ttest-mlogloss:0.548617\n",
      "[325]\ttrain-mlogloss:0.364779\ttest-mlogloss:0.548644\n",
      "[326]\ttrain-mlogloss:0.364241\ttest-mlogloss:0.548594\n",
      "[327]\ttrain-mlogloss:0.363824\ttest-mlogloss:0.548602\n",
      "[328]\ttrain-mlogloss:0.3634\ttest-mlogloss:0.548548\n",
      "[329]\ttrain-mlogloss:0.363085\ttest-mlogloss:0.548491\n",
      "[330]\ttrain-mlogloss:0.362653\ttest-mlogloss:0.548437\n",
      "[331]\ttrain-mlogloss:0.362338\ttest-mlogloss:0.548367\n",
      "[332]\ttrain-mlogloss:0.361838\ttest-mlogloss:0.548419\n",
      "[333]\ttrain-mlogloss:0.361572\ttest-mlogloss:0.548516\n",
      "[334]\ttrain-mlogloss:0.361207\ttest-mlogloss:0.548434\n",
      "[335]\ttrain-mlogloss:0.360795\ttest-mlogloss:0.548389\n",
      "[336]\ttrain-mlogloss:0.360272\ttest-mlogloss:0.548249\n",
      "[337]\ttrain-mlogloss:0.359874\ttest-mlogloss:0.548235\n",
      "[338]\ttrain-mlogloss:0.359489\ttest-mlogloss:0.54823\n",
      "[339]\ttrain-mlogloss:0.358986\ttest-mlogloss:0.548271\n",
      "[340]\ttrain-mlogloss:0.358536\ttest-mlogloss:0.548283\n",
      "[341]\ttrain-mlogloss:0.358192\ttest-mlogloss:0.5482\n",
      "[342]\ttrain-mlogloss:0.357849\ttest-mlogloss:0.548229\n",
      "[343]\ttrain-mlogloss:0.357487\ttest-mlogloss:0.54821\n",
      "[344]\ttrain-mlogloss:0.356953\ttest-mlogloss:0.548181\n",
      "[345]\ttrain-mlogloss:0.356421\ttest-mlogloss:0.548106\n",
      "[346]\ttrain-mlogloss:0.355903\ttest-mlogloss:0.548063\n",
      "[347]\ttrain-mlogloss:0.355627\ttest-mlogloss:0.548068\n",
      "[348]\ttrain-mlogloss:0.355334\ttest-mlogloss:0.54803\n",
      "[349]\ttrain-mlogloss:0.354875\ttest-mlogloss:0.548005\n",
      "[350]\ttrain-mlogloss:0.354477\ttest-mlogloss:0.547958\n",
      "[351]\ttrain-mlogloss:0.354084\ttest-mlogloss:0.547862\n",
      "[352]\ttrain-mlogloss:0.353584\ttest-mlogloss:0.54775\n",
      "[353]\ttrain-mlogloss:0.353249\ttest-mlogloss:0.547744\n",
      "[354]\ttrain-mlogloss:0.35303\ttest-mlogloss:0.547778\n",
      "[355]\ttrain-mlogloss:0.352646\ttest-mlogloss:0.547696\n",
      "[356]\ttrain-mlogloss:0.352297\ttest-mlogloss:0.54783\n",
      "[357]\ttrain-mlogloss:0.351894\ttest-mlogloss:0.547775\n",
      "[358]\ttrain-mlogloss:0.351425\ttest-mlogloss:0.54786\n",
      "[359]\ttrain-mlogloss:0.350943\ttest-mlogloss:0.547774\n",
      "[360]\ttrain-mlogloss:0.350602\ttest-mlogloss:0.547771\n",
      "[361]\ttrain-mlogloss:0.350357\ttest-mlogloss:0.547768\n",
      "[362]\ttrain-mlogloss:0.34985\ttest-mlogloss:0.547881\n",
      "[363]\ttrain-mlogloss:0.349465\ttest-mlogloss:0.547835\n",
      "[364]\ttrain-mlogloss:0.348895\ttest-mlogloss:0.547832\n",
      "[365]\ttrain-mlogloss:0.348455\ttest-mlogloss:0.548\n",
      "[366]\ttrain-mlogloss:0.348064\ttest-mlogloss:0.547948\n",
      "[367]\ttrain-mlogloss:0.347629\ttest-mlogloss:0.548026\n",
      "[368]\ttrain-mlogloss:0.347153\ttest-mlogloss:0.547928\n",
      "[369]\ttrain-mlogloss:0.346734\ttest-mlogloss:0.547903\n",
      "[370]\ttrain-mlogloss:0.346251\ttest-mlogloss:0.547871\n",
      "[371]\ttrain-mlogloss:0.345869\ttest-mlogloss:0.547909\n",
      "[372]\ttrain-mlogloss:0.345424\ttest-mlogloss:0.547937\n",
      "[373]\ttrain-mlogloss:0.34505\ttest-mlogloss:0.548001\n",
      "[374]\ttrain-mlogloss:0.344615\ttest-mlogloss:0.547982\n",
      "[375]\ttrain-mlogloss:0.344206\ttest-mlogloss:0.54803\n",
      "Stopping. Best iteration:\n",
      "[355]\ttrain-mlogloss:0.352646\ttest-mlogloss:0.547696\n",
      "\n",
      "[0.54803037236074925]\n"
     ]
    }
   ],
   "source": [
    "cv_scores = []\n",
    "kf = model_selection.KFold(n_splits=5, shuffle=True, random_state=2016)\n",
    "for dev_index, val_index in kf.split(range(train_X.shape[0])):\n",
    "        dev_X, val_X = train_X[dev_index,:], train_X[val_index,:]\n",
    "        dev_y, val_y = train_y[dev_index], train_y[val_index]\n",
    "        preds, model = runXGB(dev_X, dev_y, val_X, val_y)\n",
    "        cv_scores.append(log_loss(val_y, preds))\n",
    "        print(cv_scores)\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "5cff686f-2601-321d-8f81-5fa846ef7562"
   },
   "source": [
    "Now let us build the final model and get the predictions on the test set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "_cell_guid": "4fb1954d-e3f0-9369-d50c-bd1b615c0077"
   },
   "outputs": [],
   "source": [
    "preds, model = runXGB(train_X, train_y, test_X, num_rounds=400)\n",
    "out_df = pd.DataFrame(preds)\n",
    "out_df.columns = [\"high\", \"medium\", \"low\"]\n",
    "out_df[\"listing_id\"] = test_df.listing_id.values\n",
    "out_df.to_csv(\"xgb_starter2.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "b23cc080-cd12-dc7d-0877-66806a34bf4c"
   },
   "source": [
    "\n",
    "Hope this helps the python users as a good starting point."
   ]
  }
 ],
 "metadata": {
  "_change_revision": 488,
  "_is_fork": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
